****************************
* Textual Analysis
****************************

use "AliExpressReviews.dta", clear
///positive vs negative///
gen positive_ratio=positive_words/words
replace positive_ratio=0 if words==0
gen negative_ratio=negative_words/words
replace negative_ratio=0 if words==0

///general vs specific///
egen gspecific=rsum(gqual gserv gship gmaterial gsize gdesign gcolor)
egen bspecific=rsum(bqual bserv bship bmaterial bsize bdesign bcolor)

gen gsum=ggeneral+gspecific
gen bsum=bgeneral+bspecific

gen gdummy=0 if gsum==0
replace gdummy=1 if gsum>0

gen bdummy=0 if bsum==0
replace bdummy=1 if bsum>0

///objective vs subjective///
gen adj_ratio=adjs/words
replace adj_ratio=0 if words==0
gen nouns_ratio=nouns/words
replace nouns_ratio=0 if words==0

///observable vs less observable attributes///
egen gattributes_unob=rsum(gqual gmaterial)
egen battributes_unob=rsum(bqual bmaterial)

egen gattributes_ob=rsum(gcolor gdesign gship gserv)
egen battributes_ob=rsum(bcolor bdesign bship bserv)
save comments classification.dta, replace

use "transactions.dta", clear
merge 1:1 uniqueid using "comments classification.dta"
gen week = week(dofc(time))
gen year = year(dofc(time))
replace year = year - 2014
replace week = year*52+ week
gen month = month(dofc(time))
replace month = year*12 + month
//replace week = month

///Aggregating by productid week///
gen review=1-no_review
bysort productid week: egen noreview_sum=sum(no_review)
bysort productid week: egen review_sum=sum(review)
bysort productid week: egen words_sum=sum(words)
bysort productid week: egen positive_wordssum=sum(positive_words)
bysort productid week: egen negative_wordssum=sum(negative_words)


bysort productid week: egen gspecific_sum=sum(gspecific)
bysort productid week: egen bspecific_sum=sum(bspecific)
bysort productid week: egen ggeneral_sum=sum(ggeneral)
bysort productid week: egen bgeneral_sum=sum(bgeneral)
bysort productid week: egen gdummy_sum=sum(gdummy)
bysort productid week: egen bdummy_sum=sum(bdummy)

bysort productid week: egen adj_ratiosum=sum(adj_ratio)
bysort productid week: egen nouns_ratiosum=sum(nouns_ratio)

bysort productid week: egen gattributes_unobsum=sum(gattributes_unob)
bysort productid week: egen battributes_unobsum=sum(battributes_unob)
bysort productid week: egen gattributes_obsum=sum(gattributes_ob)
bysort productid week: egen battributes_obsum=sum(battributes_ob)

duplicates drop productid week, force
keep productid week *sum
save "comments analysis.dta", replace

///Merge into RDD file///
use "RDD.dta", replace
merge m:1 productid week using "comments analysis.dta"
drop _merge
foreach var of varlist *sum {
	replace `var'=0 if `var'==.
}

set more off
tsset productid week

eststo quan_comment1: qui areg lnweekquan lnwtranprice treated truerating L.noreview_sum L.positive_wordssum L.negative_wordssum L.adj_ratiosum L.nouns_ratiosum materialquality fguapro i.freturnpolicy lnsize lnitem lnpic i.week, absorb(sellerid) vce(cl productid)

eststo quan_comment2: qui areg lnweekquan lnwtranprice treated truerating L.noreview_sum L.gspecific_sum L.bspecific_sum L.ggeneral_sum L.bgeneral_sum L.nouns_ratiosum materialquality fguapro i.freturnpolicy lnsize lnitem lnpic i.week, absorb(sellerid) vce(cl productid)

eststo quan_comment3: qui areg lnweekquan lnwtranprice treated truerating L.noreview_sum L.gattributes_unobsum L.battributes_unobsum L.gattributes_obsum L.battributes_obsum L.nouns_ratiosum materialquality fguapro i.freturnpolicy lnsize lnitem lnpic i.week, absorb(sellerid) vce(cl productid)

esttab quan_comment*, drop(*.week) star(* 0.1 ** 0.05 *** 0.01) cells(b(star fmt(3)) se(par))  stats(r2 N) mti

///Figure 5 of the paper///

use "RDD.dta", replace
merge m:1 productid week using "comments analysis.dta"
drop _merge
foreach var of varlist *sum {
	replace `var'=0 if `var'==.
}

tsset productid week
eststo quan_comment4: qui areg lnweekquan lnwtranprice treated truerating L.noreview_sum L.positive_wordssum L.negative_wordssum L.adj_ratiosum L.nouns_ratiosum materialquality fguapro i.freturnpolicy lnsize lnitem lnpic i.week, absorb(sellerid) vce(cl productid)

esttab quan_comment4, drop(*.week) star(* 0.1 ** 0.05 *** 0.01) cells(b(star fmt(3)) se(par) ci)  stats(r2 N) mti

parmest, saving("comments_results.dta", replace)

use "comments_results.dta", clear
keep in 1/8
gen namen = 1 if strpos(parm,"treated")
replace namen = 2 if strpos(parm,"L.noreview_sum")
replace namen = 3 if strpos(parm,"positive_wordssum")
replace namen = 4 if strpos(parm,"negative_wordssum")
twoway (rcap min95 max95 namen if strpos(parm,"treated")) ///
        (scatter estimate namen if strpos(parm,"treated"), msymbol(T) msize(vlarge))  ///
		(rcap min95 max95 namen if strpos(parm,"L.noreview_sum")) ///
        (scatter estimate namen if strpos(parm,"L.noreview_sum"), msymbol(X) msize(vlarge))  ///
        (rcap min95 max95 namen if strpos(parm,"L.positive_wordssum")) ///
        (scatter estimate namen if strpos(parm,"L.positive_wordssum"), msymbol(S) msize(vlarge)) ///
		(rcap min95 max95 namen if strpos(parm,"L.negative_wordssum")) ///
        (scatter estimate namen if strpos(parm,"L.negative_wordssum"), msymbol(O) msize(vlarge)), ///
        legend(order(2 "rating" 4 "sales" 6 "positive word" 8 "negative word")) xscale(range(1(1) 3.5)) yscale(r(-0.1, 0.3)) ///
		xlab(1 "rating" 2 "sales" 3 "positive word" 4 "negative word" 4.5 " ") xtitle("") ytitle("Estimated effect of information")


use "RDD.dta", replace
merge m:1 productid week using "comments analysis.dta"
drop _merge
foreach var of varlist *sum {
	replace `var'=0 if `var'==.
}

tsset productid week
eststo quan_comment2: qui areg lnweekquan lnwtranprice treated truerating L.noreview_sum L.gspecific_sum L.bspecific_sum L.ggeneral_sum L.bgeneral_sum nouns_ratiosum materialquality fguapro i.freturnpolicy lnsize lnitem lnpic i.week, absorb(sellerid) vce(cl productid)

esttab quan_comment2, drop(*.week) star(* 0.1 ** 0.05 *** 0.01) cells(b(star fmt(3)) se(par) ci)  stats(r2 N) mti

parmest, saving("comments_results 2.dta", replace)

use "comments_results 2.dta", clear
keep in 1/11
gen namen = 1 if strpos(parm,"L.ggeneral_sum")
replace namen = 2 if strpos(parm,"L.gspecific_sum")
replace namen = 3 if strpos(parm,"L.bgeneral_sum")
replace namen = 4 if strpos(parm,"L.bspecific_sum")
twoway (rcap min95 max95 namen if strpos(parm,"L.ggeneral_sum")) ///
        (scatter estimate namen if strpos(parm,"L.ggeneral_sum"), msymbol(T) msize(vlarge))  ///
		(rcap min95 max95 namen if strpos(parm,"L.gspecific_sum")) ///
        (scatter estimate namen if strpos(parm,"L.gspecific_sum"), msymbol(X) msize(vlarge))  ///
        (rcap min95 max95 namen if strpos(parm,"L.bgeneral_sum")) ///
        (scatter estimate namen if strpos(parm,"L.bgeneral_sum"), msymbol(S) msize(vlarge)) ///
		(rcap min95 max95 namen if strpos(parm,"L.bspecific_sum")) ///
        (scatter estimate namen if strpos(parm,"L.bspecific_sum"), msymbol(O) msize(vlarge)), ///
        legend(order(2 "General(+)" 4 "Specific (+)" 6 "General (-)" 8 "Specific (-)")) xscale(range(1(1) 4.5)) yscale(r(-0.3, 0.1)) ///
		xlab(1 "General(+)" 2 "Specific (+)" 3 "General (-)" 4 "Specific (-)" 4.5 " ") xtitle("") ytitle("Estimated effect of information")

		
use "RDD.dta", replace
merge m:1 productid week using "comments analysis.dta"
drop _merge
foreach var of varlist *sum {
	replace `var'=0 if `var'==.
}

tsset productid week
eststo quan_comment3: qui areg lnweekquan lnwtranprice treated truerating L.noreview_sum L.gattributes_unobsum L.battributes_unobsum L.gattributes_obsum L.battributes_obsum L.nouns_ratiosum materialquality fguapro i.freturnpolicy lnsize lnitem lnpic i.week, absorb(sellerid) vce(cl productid)

esttab quan_comment3, drop(*.week) star(* 0.1 ** 0.05 *** 0.01) cells(b(star fmt(3)) se(par) ci)  stats(r2 N) mti

parmest, saving("comments_results 3.dta", replace)

use "comments_results 3.dta", clear
keep in 1/12
gen namen = 1 if strpos(parm,"L.gattributes_obsum")
replace namen = 2 if strpos(parm,"L.gattributes_unobsum")
replace namen = 3 if strpos(parm,"L.battributes_obsum")
replace namen = 4 if strpos(parm,"L.battributes_unobsum")
twoway (rcap min95 max95 namen if strpos(parm,"L.gattributes_obsum")) ///
        (scatter estimate namen if strpos(parm,"L.gattributes_obsum"), msymbol(T) msize(vlarge))  ///
		(rcap min95 max95 namen if strpos(parm,"L.gattributes_unobsum")) ///
        (scatter estimate namen if strpos(parm,"L.gattributes_unobsum"), msymbol(X) msize(vlarge))  ///
        (rcap min95 max95 namen if strpos(parm,"L.battributes_obsum")) ///
        (scatter estimate namen if strpos(parm,"L.battributes_obsum"), msymbol(S) msize(vlarge)) ///
		(rcap min95 max95 namen if strpos(parm,"L.battributes_unobsum")) ///
        (scatter estimate namen if strpos(parm,"L.battributes_unobsum"), msymbol(O) msize(vlarge)), ///
        legend(order(2 "Ob. attributes(+)" 4 "Unob. attributes (+)" 6 "Ob. attributes (-)" 8 "Unob. attributes (-)")) xscale(range(1(1) 4.5)) yscale(r(-0.4, 0.2)) ///
		xlab(1 "Ob. attributes(+)" 2 "Unob. attributes(+)" 3 "Ob. attributes(-)" 4 "Unob. attributes(-)" 4.5 " ") xtitle("") ytitle("Estimated effect of information")

